

***regression
use "$data/regression_sample", clear
merge m:1 iris using "$data/matching/employee_shocks", keep(3) nogen

gen tau=year-year_shock
replace tau=-1 if treated==0
forv i=13(-1)1 {
	gen byte periodm`i'=(tau==-`i')
}

forv i=0/15 {
gen byte period`i' = (tau==`i')
}
gen periodm5b=(tau<=-5)
gen periodm7b=(tau<=-7)
gen period5a=(tau>=5)
gen period7a=(tau>=7)
gen byte post=(tau>0)

gen temp=year if log_expenditure!=.
cap drop minyear
bysort iris: egen minyear=min(temp)

bysort emp_num: egen nyear=sum(log_expenditure!=.)

gen log_expenditure_control=log(total_direct_expenditure+1)
gen log_expenditure_federal=log(1+total_direct_federal)
gen log_expenditure_private=log(1+total_direct_private)
gen fieldcat="Science" if ( fieldid==4 | fieldid==12 | fieldid==15 | fieldid==16 | fieldid==5)
replace fieldcat="Engineering" if fieldid==8
replace fieldcat="Medicine" if fieldid==13 | fieldid==2
replace fieldcat="Other" if fieldcat==""

merge 1:1 iris year using "$data\pi", keep(1 3) nogen
sort emp_num year
replace pinumber=pinumber[_n-1] if emp_num==emp_num[_n-1] & pinumber==.
replace pinumber=0 if pinumber==.

*by citations
gen npatent_highcitation=npatent_quartile3+npatent_quartile4
gen npatent_lowcitation=npatent-npatent_highcitation
gen npatent_loworiginality=npatent-npatent_highoriginality
gen npatent_lowgenerality=npatent-npatent_highgenerality
gen pub=(npub>0)
gen npub_lowjif=npub-npub_highjif
gen npub_lowcite1=npub-npub_highcite1
gen npub_lowcite2=npub-npub_highcite2

tsset emp_num year

/**************************************
***********REGRESSIONS***************
**************************************/

global Y patent npatent npatent_lowcitation npatent_highcitation npatent_lowgenerality npatent_highgenerality pub npub fcites_3yr npub_highcite2 npub_lowcite2 npub_highjif npub_lowjif npub_basic npub_applied npub_patent log_expenditure_federal log_expenditure_private log_expenditure share_federal share_private

*Diff in diff regressions (Table 2, 4, 5, 7)
eststo clear

*person FE
foreach var in $Y {
    eststo `var': reghdfe `var' post if tau^2<50 & include==1, a(i.year#i.submit_u#i.fieldid i.emp_num) vce(cluster emp_num)
}

esttab using "$data\results\pretrend\march2021\diffindiff.csv", replace numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)

eststo clear

*PI FE
foreach var in $Y {
    eststo `var': reghdfe `var' post if tau^2<50 & include==1, a(i.year#i.submit_u#i.fieldid i.pinumber) vce(cluster emp_num)
}

esttab using "$data\results\pretrend\march2021\diffindiff.csv", append numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)


*lab level analysis (Table A4) 

bysort pinumber year: egen maxtreat=max(treat)
bysort pinumber year: egen mintreat=min(treat)
bysort pinumber year: gen count=_n
*merge lab-level size and innovation measures
merge m:1 pinumber year using "$data\lablevel", keep(1 3) nogen

gen log_expenditure_lab=log(expenditure_lab)
gen log_expenditure_fed_lab=log(expenditure_federal_lab)
gen patent_lab=(npatent_lab>0)
gen pub_lab=(npub_lab>0)

eststo clear

foreach var in labsize patent_lab npatent_lab pub_lab npub_lab  {

eststo: reghdfe `var' post if tau^2<50 & labsize<100 & count==1 & maxtreat==mintreat , a(i.year#i.submit_u#i.fieldid i.pinumber) vce(cluster pinumber)

}

esttab using "$data\results\pretrend\march2021\labsize.csv", append numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)


*heterogeneity by occupation (Table 3)

gen post_occfaculty=post*(umetrics_occ=="Faculty")
gen post_occgrad=post*(umetrics_occ=="Graduate Student")
gen post_occundergrad=post*(umetrics_occ=="Undergraduate")
gen post_occstaff=post*(umetrics_occ=="Staff")

eststo clear

foreach var in $Y {
    eststo `var'_f: reghdfe `var' post if tau^2<50 & umetrics_occ=="Faculty" & include==1, a(i.year#i.submit_u#i.fieldid i.emp_num) vce(cluster emp_num)
	eststo `var'_g: reghdfe `var' post if tau^2<50 & umetrics_occ=="Graduate Student" & include==1, a(i.year#i.submit_u#i.fieldid i.emp_num) vce(cluster emp_num)
	eststo `var'_u: reghdfe `var' post if tau^2<50 & umetrics_occ=="Undergraduate" & include==1, a(i.year#i.submit_u#i.fieldid i.emp_num) vce(cluster emp_num)
	eststo `var'_s: reghdfe `var' post if tau^2<50 & umetrics_occ=="Staff" & include==1, a(i.year#i.submit_u#i.fieldid i.emp_num) vce(cluster emp_num)
}

esttab using "$data\results\pretrend\march2021\diffindiff_occ.csv", replace numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)

eststo clear
foreach var in $Y {
    eststo `var'_f: reghdfe `var' post if tau^2<50 & umetrics_occ=="Faculty" & include==1, a(i.year#i.submit_u#i.fieldid i.pinumber) vce(cluster emp_num)
	eststo `var'_g: reghdfe `var' post if tau^2<50 & umetrics_occ=="Graduate Student" & include==1, a(i.year#i.submit_u#i.fieldid i.pinumber) vce(cluster emp_num)
	eststo `var'_u: reghdfe `var' post if tau^2<50 & umetrics_occ=="Undergraduate" & include==1, a(i.year#i.submit_u#i.fieldid i.pinumber) vce(cluster emp_num)
	eststo `var'_s: reghdfe `var' post if tau^2<50 & umetrics_occ=="Staff" & include==1, a(i.year#i.submit_u#i.fieldid i.pinumber) vce(cluster emp_num)
	}

esttab using "$data\results\pretrend\march2021\diffindiff_occ.csv", append numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)


*heterogeneity by field

replace fieldcat="E" if fieldcat=="Engineering"
replace fieldcat="M" if fieldcat=="Medicine"
replace fieldcat="S" if fieldcat=="Science"
replace fieldcat="O" if fieldcat=="Other"

eststo clear

foreach var in $Y {
	foreach field in E M S O {
    eststo `var'_`field': reghdfe `var' post if tau^2<50 & fieldcat=="`field'" & include==1, a(i.year#i.submit_u#i.fieldid i.emp_num) vce(cluster emp_num)
}
}

esttab using "$data\results\pretrend\march2021\diffindiff_field.csv", replace numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)

eststo clear
foreach var in $Y {
	foreach field in E M S O {
    eststo `var'_`field': reghdfe `var' post if tau^2<50 & fieldcat=="`field'" & include==1, a(i.year#i.submit_u#i.fieldid i.pinumber) vce(cluster emp_num)
	}
}

esttab using "$data\results\pretrend\march2021\diffindiff_field.csv", append numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)


*cluster by university field (Table A5)

egen department=group(submit_u fieldid)
ren iris new_emp
merge m:1 new_emp using "$data/main_cfda", keep(1 3) nogen
ren new_emp iris_employee_number
tsset emp_num year

eststo clear

foreach var in $Y {
    eststo `var': reghdfe `var' post if tau^2<50 & include==1, a(i.year#i.submit_u#i.fieldid i.emp_num) vce(cluster department)
}

esttab using "$data\results\pretrend\march2021\diffindiff_clusterdep.csv", replace numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)

eststo clear
foreach var in $Y {
    eststo `var': reghdfe `var' post if tau^2<50 & include==1, a(i.year#i.submit_u#i.fieldid i.pinumber) vce(cluster department)
}

esttab using "$data\results\pretrend\march2021\diffindiff_clusterdep.csv", append numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)


*cluster by main cfda (Table A6)

eststo clear

foreach var in $Y {
    eststo `var': reghdfe `var' post if tau^2<50 & include==1, a(i.year#i.submit_u#i.fieldid i.emp_num) vce(cluster main_cfda)
}

esttab using "$data\results\pretrend\march2021\diffindiff_clustercfda.csv", replace numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)

eststo clear
foreach var in $Y {
    eststo `var': reghdfe `var' post if tau^2<50 & include==1, a(i.year#i.submit_u#i.fieldid i.pinumber) vce(cluster main_cfda)
}

esttab using "$data\results\pretrend\march2021\diffindiff_clustercfda.csv", append numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)


*event study (Figure 3, 5, 6, 7, 9, A5)

eststo clear

foreach var in $Y {
    eststo: reghdfe `var' periodm7-periodm1 period1-period7 period5a  if  tau^2<50 & include==1, a(i.year#i.submit_u#i.fieldid i.emp_num) vce(cluster emp_num)
}

esttab using "$data\results\pretrend\march2021\eventstudy.csv", append numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)

eststo clear

foreach var in $Y {
    eststo: reghdfe `var' periodm7-periodm1 period1-period7  if  tau^2<50 & include==1, a(i.year#i.submit_u#i.fieldid i.pinumber) vce(cluster emp_num)
}

esttab using "$data\results\pretrend\march2021\eventstudy.csv", append numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)



*balance test (Table A1)

tsset emp_num year

gen gradstudent=(umetrics_occ=="Graduate Student")
gen undergrad=(umetrics_occ=="Undergraduate")

eststo clear

eststo: reghdfe share_federal treated if tau<0 & share_federal>0&include==1, absorb(i.year#i.submit_u#i.fieldid) vce(cluster emp_num)
eststo: reghdfe share_private treated if tau<0 & share_federal>0&include==1, absorb(i.year#i.submit_u#i.fieldid) vce(cluster emp_num)
eststo: reghdfe avg_log_expenditure treated if tau<0 & share_federal>0&include==1, absorb(i.year#i.submit_u#i.fieldid) vce(cluster emp_num)
eststo: reghdfe faculty treated if tau<0 & share_federal>0&include==1, absorb(i.year#i.submit_u#i.fieldid) vce(cluster emp_num)
eststo: reghdfe gradstudent treated if tau<0 & share_federal>0&include==1, absorb(i.year#i.submit_u#i.fieldid) vce(cluster emp_num)
eststo: reghdfe undergrad treated if tau<0 & share_federal>0&include==1, absorb(i.year#i.submit_u#i.fieldid) vce(cluster emp_num)
eststo: reghdfe patent treated if tau<0 & share_federal>0&include==1, absorb(i.year#i.submit_u#i.fieldid) vce(cluster emp_num)
eststo: reghdfe npatent treated if tau<0 & share_federal>0&include==1, absorb(i.year#i.submit_u#i.fieldid) vce(cluster emp_num)
eststo: reghdfe pub treated if tau<0 & share_federal>0&include==1, absorb(i.year#i.submit_u#i.fieldid) vce(cluster emp_num)
eststo: reghdfe npub treated if tau<0 & share_federal>0&include==1, absorb(i.year#i.submit_u#i.fieldid) vce(cluster emp_num)

esttab using "$data\results\nov2021\balance_test.csv", append numbers label nogaps r2 ar2 star(* 0.1 ** 0.05 *** 0.01) b(%8.4f) se(%8.4f)


